Goal/Purpose of operations:
Checking GTEx samples for cofounders and exploratory data analysis
Finished psedocode on:
210806
System which operations were done on:
Jen lab mac
GitHub Repo:
230321_JLF_Sex_bias_adverse_events
Directory of operations:
/home/rstudio
Scripts being edited for operations:
NA
Docker:
jenfisher7/rstudio_sex_bias_drugs
Data being used:
GTEx
GTEx_Analysis_v8_Annotations_SubjectPhenotypesDS.txt
GTEx_Analysis_v8_Annotations_SampleAttributesDS.txt
downloaded from https://gtexportal.org/home/datasets on 210517
Papers and tools:
Main- ggplot2 and tidyverse packages
additional tools starting at line 59.
setwd("/home/rstudio")
library(recount3)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(rlang)
##
## Attaching package: 'rlang'
## The following object is masked from 'package:Biobase':
##
## exprs
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:Biobase':
##
## combine
## The following objects are masked from 'package:GenomicRanges':
##
## intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
##
## intersect
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following object is masked from 'package:matrixStats':
##
## count
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(dbplyr)
##
## Attaching package: 'dbplyr'
## The following objects are masked from 'package:dplyr':
##
## ident, sql
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.0
## ✔ tibble 3.1.8 ✔ stringr 1.5.0
## ✔ tidyr 1.2.1 ✔ forcats 0.5.2
## ✔ readr 2.1.3
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ purrr::%@%() masks rlang::%@%()
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ rlang::exprs() masks Biobase::exprs()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ purrr::flatten() masks rlang::flatten()
## ✖ purrr::flatten_chr() masks rlang::flatten_chr()
## ✖ purrr::flatten_dbl() masks rlang::flatten_dbl()
## ✖ purrr::flatten_int() masks rlang::flatten_int()
## ✖ purrr::flatten_lgl() masks rlang::flatten_lgl()
## ✖ purrr::flatten_raw() masks rlang::flatten_raw()
## ✖ dbplyr::ident() masks dplyr::ident()
## ✖ purrr::invoke() masks rlang::invoke()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ dplyr::slice() masks IRanges::slice()
## ✖ purrr::splice() masks rlang::splice()
## ✖ dbplyr::sql() masks dplyr::sql()
library(ggplot2)
library(viridis)
## Loading required package: viridisLite
library(RColorBrewer)
get Gtex data from recount3
human_projects <- readRDS("~/data/human_recount3_projects.rds")
gtex_proj_info <- subset(human_projects, file_source == "gtex")
for (i in seq_len(nrow(gtex_proj_info))) {
name <- paste(gtex_proj_info[i, 1], "rse", sep = "_")
assign(name, create_rse(gtex_proj_info[i, ]))
}
## 2023-03-27 13:51:52 downloading and reading the metadata.
## 2023-03-27 13:51:53 caching file gtex.gtex.ADIPOSE_TISSUE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/UE/ADIPOSE_TISSUE/gtex.gtex.ADIPOSE_TISSUE.MD.gz'
## 2023-03-27 13:51:55 caching file gtex.recount_project.ADIPOSE_TISSUE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/UE/ADIPOSE_TISSUE/gtex.recount_project.ADIPOSE_TISSUE.MD.gz'
## 2023-03-27 13:51:56 caching file gtex.recount_qc.ADIPOSE_TISSUE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/UE/ADIPOSE_TISSUE/gtex.recount_qc.ADIPOSE_TISSUE.MD.gz'
## 2023-03-27 13:51:58 caching file gtex.recount_seq_qc.ADIPOSE_TISSUE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/UE/ADIPOSE_TISSUE/gtex.recount_seq_qc.ADIPOSE_TISSUE.MD.gz'
## 2023-03-27 13:51:59 downloading and reading the feature information.
## 2023-03-27 13:51:59 caching file human.gene_sums.G026.gtf.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/annotations/gene_sums/human.gene_sums.G026.gtf.gz'
## 2023-03-27 13:52:01 downloading and reading the counts: 1293 samples across 63856 features.
## 2023-03-27 13:52:02 caching file gtex.gene_sums.ADIPOSE_TISSUE.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/UE/ADIPOSE_TISSUE/gtex.gene_sums.ADIPOSE_TISSUE.G026.gz'
## 2023-03-27 13:52:22 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:52:22 downloading and reading the metadata.
## 2023-03-27 13:52:22 caching file gtex.gtex.MUSCLE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/LE/MUSCLE/gtex.gtex.MUSCLE.MD.gz'
## 2023-03-27 13:52:24 caching file gtex.recount_project.MUSCLE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/LE/MUSCLE/gtex.recount_project.MUSCLE.MD.gz'
## 2023-03-27 13:52:25 caching file gtex.recount_qc.MUSCLE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/LE/MUSCLE/gtex.recount_qc.MUSCLE.MD.gz'
## 2023-03-27 13:52:27 caching file gtex.recount_seq_qc.MUSCLE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/LE/MUSCLE/gtex.recount_seq_qc.MUSCLE.MD.gz'
## 2023-03-27 13:52:28 downloading and reading the feature information.
## 2023-03-27 13:52:29 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:52:30 downloading and reading the counts: 881 samples across 63856 features.
## 2023-03-27 13:52:30 caching file gtex.gene_sums.MUSCLE.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/LE/MUSCLE/gtex.gene_sums.MUSCLE.G026.gz'
## 2023-03-27 13:52:40 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:52:40 downloading and reading the metadata.
## 2023-03-27 13:52:40 caching file gtex.gtex.BLOOD_VESSEL.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/EL/BLOOD_VESSEL/gtex.gtex.BLOOD_VESSEL.MD.gz'
## 2023-03-27 13:52:42 caching file gtex.recount_project.BLOOD_VESSEL.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/EL/BLOOD_VESSEL/gtex.recount_project.BLOOD_VESSEL.MD.gz'
## 2023-03-27 13:52:43 caching file gtex.recount_qc.BLOOD_VESSEL.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/EL/BLOOD_VESSEL/gtex.recount_qc.BLOOD_VESSEL.MD.gz'
## 2023-03-27 13:52:44 caching file gtex.recount_seq_qc.BLOOD_VESSEL.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/EL/BLOOD_VESSEL/gtex.recount_seq_qc.BLOOD_VESSEL.MD.gz'
## 2023-03-27 13:52:45 downloading and reading the feature information.
## 2023-03-27 13:52:46 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:52:46 downloading and reading the counts: 1398 samples across 63856 features.
## 2023-03-27 13:52:47 caching file gtex.gene_sums.BLOOD_VESSEL.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/EL/BLOOD_VESSEL/gtex.gene_sums.BLOOD_VESSEL.G026.gz'
## 2023-03-27 13:53:05 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:53:05 downloading and reading the metadata.
## 2023-03-27 13:53:06 caching file gtex.gtex.HEART.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/RT/HEART/gtex.gtex.HEART.MD.gz'
## 2023-03-27 13:53:07 caching file gtex.recount_project.HEART.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/RT/HEART/gtex.recount_project.HEART.MD.gz'
## 2023-03-27 13:53:09 caching file gtex.recount_qc.HEART.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/RT/HEART/gtex.recount_qc.HEART.MD.gz'
## 2023-03-27 13:53:10 caching file gtex.recount_seq_qc.HEART.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/RT/HEART/gtex.recount_seq_qc.HEART.MD.gz'
## 2023-03-27 13:53:12 downloading and reading the feature information.
## 2023-03-27 13:53:12 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:53:13 downloading and reading the counts: 942 samples across 63856 features.
## 2023-03-27 13:53:13 caching file gtex.gene_sums.HEART.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/RT/HEART/gtex.gene_sums.HEART.G026.gz'
## 2023-03-27 13:53:25 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:53:25 downloading and reading the metadata.
## 2023-03-27 13:53:26 caching file gtex.gtex.OVARY.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/RY/OVARY/gtex.gtex.OVARY.MD.gz'
## 2023-03-27 13:53:27 caching file gtex.recount_project.OVARY.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/RY/OVARY/gtex.recount_project.OVARY.MD.gz'
## 2023-03-27 13:53:29 caching file gtex.recount_qc.OVARY.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/RY/OVARY/gtex.recount_qc.OVARY.MD.gz'
## 2023-03-27 13:53:31 caching file gtex.recount_seq_qc.OVARY.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/RY/OVARY/gtex.recount_seq_qc.OVARY.MD.gz'
## 2023-03-27 13:53:32 downloading and reading the feature information.
## 2023-03-27 13:53:33 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:53:33 downloading and reading the counts: 195 samples across 63856 features.
## 2023-03-27 13:53:34 caching file gtex.gene_sums.OVARY.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/RY/OVARY/gtex.gene_sums.OVARY.G026.gz'
## 2023-03-27 13:53:36 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:53:36 downloading and reading the metadata.
## 2023-03-27 13:53:37 caching file gtex.gtex.UTERUS.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/US/UTERUS/gtex.gtex.UTERUS.MD.gz'
## 2023-03-27 13:53:38 caching file gtex.recount_project.UTERUS.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/US/UTERUS/gtex.recount_project.UTERUS.MD.gz'
## 2023-03-27 13:53:40 caching file gtex.recount_qc.UTERUS.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/US/UTERUS/gtex.recount_qc.UTERUS.MD.gz'
## 2023-03-27 13:53:41 caching file gtex.recount_seq_qc.UTERUS.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/US/UTERUS/gtex.recount_seq_qc.UTERUS.MD.gz'
## 2023-03-27 13:53:42 downloading and reading the feature information.
## 2023-03-27 13:53:43 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:53:44 downloading and reading the counts: 159 samples across 63856 features.
## 2023-03-27 13:53:44 caching file gtex.gene_sums.UTERUS.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/US/UTERUS/gtex.gene_sums.UTERUS.G026.gz'
## 2023-03-27 13:53:47 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:53:47 downloading and reading the metadata.
## 2023-03-27 13:53:48 caching file gtex.gtex.VAGINA.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/NA/VAGINA/gtex.gtex.VAGINA.MD.gz'
## 2023-03-27 13:53:49 caching file gtex.recount_project.VAGINA.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/NA/VAGINA/gtex.recount_project.VAGINA.MD.gz'
## 2023-03-27 13:53:50 caching file gtex.recount_qc.VAGINA.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/NA/VAGINA/gtex.recount_qc.VAGINA.MD.gz'
## 2023-03-27 13:53:52 caching file gtex.recount_seq_qc.VAGINA.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/NA/VAGINA/gtex.recount_seq_qc.VAGINA.MD.gz'
## 2023-03-27 13:53:53 downloading and reading the feature information.
## 2023-03-27 13:53:53 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:53:54 downloading and reading the counts: 173 samples across 63856 features.
## 2023-03-27 13:53:55 caching file gtex.gene_sums.VAGINA.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/NA/VAGINA/gtex.gene_sums.VAGINA.G026.gz'
## 2023-03-27 13:53:57 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:53:57 downloading and reading the metadata.
## 2023-03-27 13:53:58 caching file gtex.gtex.BREAST.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ST/BREAST/gtex.gtex.BREAST.MD.gz'
## 2023-03-27 13:53:59 caching file gtex.recount_project.BREAST.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ST/BREAST/gtex.recount_project.BREAST.MD.gz'
## 2023-03-27 13:54:00 caching file gtex.recount_qc.BREAST.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ST/BREAST/gtex.recount_qc.BREAST.MD.gz'
## 2023-03-27 13:54:02 caching file gtex.recount_seq_qc.BREAST.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ST/BREAST/gtex.recount_seq_qc.BREAST.MD.gz'
## 2023-03-27 13:54:03 downloading and reading the feature information.
## 2023-03-27 13:54:04 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:54:04 downloading and reading the counts: 482 samples across 63856 features.
## 2023-03-27 13:54:05 caching file gtex.gene_sums.BREAST.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/ST/BREAST/gtex.gene_sums.BREAST.G026.gz'
## 2023-03-27 13:54:11 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:54:11 downloading and reading the metadata.
## 2023-03-27 13:54:12 caching file gtex.gtex.SKIN.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/IN/SKIN/gtex.gtex.SKIN.MD.gz'
## 2023-03-27 13:54:13 caching file gtex.recount_project.SKIN.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/IN/SKIN/gtex.recount_project.SKIN.MD.gz'
## 2023-03-27 13:54:15 caching file gtex.recount_qc.SKIN.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/IN/SKIN/gtex.recount_qc.SKIN.MD.gz'
## 2023-03-27 13:54:16 caching file gtex.recount_seq_qc.SKIN.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/IN/SKIN/gtex.recount_seq_qc.SKIN.MD.gz'
## 2023-03-27 13:54:18 downloading and reading the feature information.
## 2023-03-27 13:54:18 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:54:19 downloading and reading the counts: 1940 samples across 63856 features.
## 2023-03-27 13:54:20 caching file gtex.gene_sums.SKIN.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/IN/SKIN/gtex.gene_sums.SKIN.G026.gz'
## 2023-03-27 13:54:43 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:54:44 downloading and reading the metadata.
## 2023-03-27 13:54:44 caching file gtex.gtex.SALIVARY_GLAND.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ND/SALIVARY_GLAND/gtex.gtex.SALIVARY_GLAND.MD.gz'
## 2023-03-27 13:54:46 caching file gtex.recount_project.SALIVARY_GLAND.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ND/SALIVARY_GLAND/gtex.recount_project.SALIVARY_GLAND.MD.gz'
## 2023-03-27 13:54:47 caching file gtex.recount_qc.SALIVARY_GLAND.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ND/SALIVARY_GLAND/gtex.recount_qc.SALIVARY_GLAND.MD.gz'
## 2023-03-27 13:54:48 caching file gtex.recount_seq_qc.SALIVARY_GLAND.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ND/SALIVARY_GLAND/gtex.recount_seq_qc.SALIVARY_GLAND.MD.gz'
## 2023-03-27 13:54:51 downloading and reading the feature information.
## 2023-03-27 13:54:51 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:54:52 downloading and reading the counts: 178 samples across 63856 features.
## 2023-03-27 13:54:52 caching file gtex.gene_sums.SALIVARY_GLAND.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/ND/SALIVARY_GLAND/gtex.gene_sums.SALIVARY_GLAND.G026.gz'
## 2023-03-27 13:54:56 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:54:56 downloading and reading the metadata.
## 2023-03-27 13:54:56 caching file gtex.gtex.BRAIN.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/IN/BRAIN/gtex.gtex.BRAIN.MD.gz'
## 2023-03-27 13:54:58 caching file gtex.recount_project.BRAIN.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/IN/BRAIN/gtex.recount_project.BRAIN.MD.gz'
## 2023-03-27 13:54:59 caching file gtex.recount_qc.BRAIN.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/IN/BRAIN/gtex.recount_qc.BRAIN.MD.gz'
## 2023-03-27 13:55:01 caching file gtex.recount_seq_qc.BRAIN.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/IN/BRAIN/gtex.recount_seq_qc.BRAIN.MD.gz'
## 2023-03-27 13:55:02 downloading and reading the feature information.
## 2023-03-27 13:55:03 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:55:04 downloading and reading the counts: 2931 samples across 63856 features.
## 2023-03-27 13:55:05 caching file gtex.gene_sums.BRAIN.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/IN/BRAIN/gtex.gene_sums.BRAIN.G026.gz'
## 2023-03-27 13:55:39 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:55:39 downloading and reading the metadata.
## 2023-03-27 13:55:40 caching file gtex.gtex.ADRENAL_GLAND.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ND/ADRENAL_GLAND/gtex.gtex.ADRENAL_GLAND.MD.gz'
## 2023-03-27 13:55:41 caching file gtex.recount_project.ADRENAL_GLAND.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ND/ADRENAL_GLAND/gtex.recount_project.ADRENAL_GLAND.MD.gz'
## 2023-03-27 13:55:43 caching file gtex.recount_qc.ADRENAL_GLAND.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ND/ADRENAL_GLAND/gtex.recount_qc.ADRENAL_GLAND.MD.gz'
## 2023-03-27 13:55:44 caching file gtex.recount_seq_qc.ADRENAL_GLAND.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ND/ADRENAL_GLAND/gtex.recount_seq_qc.ADRENAL_GLAND.MD.gz'
## 2023-03-27 13:55:45 downloading and reading the feature information.
## 2023-03-27 13:55:46 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:55:46 downloading and reading the counts: 274 samples across 63856 features.
## 2023-03-27 13:55:47 caching file gtex.gene_sums.ADRENAL_GLAND.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/ND/ADRENAL_GLAND/gtex.gene_sums.ADRENAL_GLAND.G026.gz'
## 2023-03-27 13:55:51 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:55:51 downloading and reading the metadata.
## 2023-03-27 13:55:52 caching file gtex.gtex.THYROID.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ID/THYROID/gtex.gtex.THYROID.MD.gz'
## 2023-03-27 13:55:53 caching file gtex.recount_project.THYROID.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ID/THYROID/gtex.recount_project.THYROID.MD.gz'
## 2023-03-27 13:55:55 caching file gtex.recount_qc.THYROID.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ID/THYROID/gtex.recount_qc.THYROID.MD.gz'
## 2023-03-27 13:55:56 caching file gtex.recount_seq_qc.THYROID.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ID/THYROID/gtex.recount_seq_qc.THYROID.MD.gz'
## 2023-03-27 13:55:57 downloading and reading the feature information.
## 2023-03-27 13:55:58 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:55:58 downloading and reading the counts: 706 samples across 63856 features.
## 2023-03-27 13:55:59 caching file gtex.gene_sums.THYROID.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/ID/THYROID/gtex.gene_sums.THYROID.G026.gz'
## 2023-03-27 13:56:07 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:56:07 downloading and reading the metadata.
## 2023-03-27 13:56:08 caching file gtex.gtex.LUNG.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/NG/LUNG/gtex.gtex.LUNG.MD.gz'
## 2023-03-27 13:56:09 caching file gtex.recount_project.LUNG.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/NG/LUNG/gtex.recount_project.LUNG.MD.gz'
## 2023-03-27 13:56:10 caching file gtex.recount_qc.LUNG.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/NG/LUNG/gtex.recount_qc.LUNG.MD.gz'
## 2023-03-27 13:56:12 caching file gtex.recount_seq_qc.LUNG.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/NG/LUNG/gtex.recount_seq_qc.LUNG.MD.gz'
## 2023-03-27 13:56:13 downloading and reading the feature information.
## 2023-03-27 13:56:13 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:56:14 downloading and reading the counts: 655 samples across 63856 features.
## 2023-03-27 13:56:15 caching file gtex.gene_sums.LUNG.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/NG/LUNG/gtex.gene_sums.LUNG.G026.gz'
## 2023-03-27 13:56:21 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:56:21 downloading and reading the metadata.
## 2023-03-27 13:56:22 caching file gtex.gtex.SPLEEN.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/EN/SPLEEN/gtex.gtex.SPLEEN.MD.gz'
## 2023-03-27 13:56:23 caching file gtex.recount_project.SPLEEN.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/EN/SPLEEN/gtex.recount_project.SPLEEN.MD.gz'
## 2023-03-27 13:56:25 caching file gtex.recount_qc.SPLEEN.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/EN/SPLEEN/gtex.recount_qc.SPLEEN.MD.gz'
## 2023-03-27 13:56:26 caching file gtex.recount_seq_qc.SPLEEN.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/EN/SPLEEN/gtex.recount_seq_qc.SPLEEN.MD.gz'
## 2023-03-27 13:56:27 downloading and reading the feature information.
## 2023-03-27 13:56:28 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:56:28 downloading and reading the counts: 255 samples across 63856 features.
## 2023-03-27 13:56:29 caching file gtex.gene_sums.SPLEEN.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/EN/SPLEEN/gtex.gene_sums.SPLEEN.G026.gz'
## 2023-03-27 13:56:32 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:56:32 downloading and reading the metadata.
## 2023-03-27 13:56:33 caching file gtex.gtex.PANCREAS.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/AS/PANCREAS/gtex.gtex.PANCREAS.MD.gz'
## 2023-03-27 13:56:34 caching file gtex.recount_project.PANCREAS.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/AS/PANCREAS/gtex.recount_project.PANCREAS.MD.gz'
## 2023-03-27 13:56:36 caching file gtex.recount_qc.PANCREAS.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/AS/PANCREAS/gtex.recount_qc.PANCREAS.MD.gz'
## 2023-03-27 13:56:37 caching file gtex.recount_seq_qc.PANCREAS.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/AS/PANCREAS/gtex.recount_seq_qc.PANCREAS.MD.gz'
## 2023-03-27 13:56:38 downloading and reading the feature information.
## 2023-03-27 13:56:39 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:56:39 downloading and reading the counts: 360 samples across 63856 features.
## 2023-03-27 13:56:40 caching file gtex.gene_sums.PANCREAS.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/AS/PANCREAS/gtex.gene_sums.PANCREAS.G026.gz'
## 2023-03-27 13:56:44 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:56:44 downloading and reading the metadata.
## 2023-03-27 13:56:44 caching file gtex.gtex.ESOPHAGUS.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/US/ESOPHAGUS/gtex.gtex.ESOPHAGUS.MD.gz'
## 2023-03-27 13:56:46 caching file gtex.recount_project.ESOPHAGUS.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/US/ESOPHAGUS/gtex.recount_project.ESOPHAGUS.MD.gz'
## 2023-03-27 13:56:47 caching file gtex.recount_qc.ESOPHAGUS.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/US/ESOPHAGUS/gtex.recount_qc.ESOPHAGUS.MD.gz'
## 2023-03-27 13:56:49 caching file gtex.recount_seq_qc.ESOPHAGUS.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/US/ESOPHAGUS/gtex.recount_seq_qc.ESOPHAGUS.MD.gz'
## 2023-03-27 13:56:50 downloading and reading the feature information.
## 2023-03-27 13:56:50 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:56:51 downloading and reading the counts: 1577 samples across 63856 features.
## 2023-03-27 13:56:51 caching file gtex.gene_sums.ESOPHAGUS.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/US/ESOPHAGUS/gtex.gene_sums.ESOPHAGUS.G026.gz'
## 2023-03-27 13:57:12 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:57:12 downloading and reading the metadata.
## 2023-03-27 13:57:13 caching file gtex.gtex.STOMACH.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/CH/STOMACH/gtex.gtex.STOMACH.MD.gz'
## 2023-03-27 13:57:14 caching file gtex.recount_project.STOMACH.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/CH/STOMACH/gtex.recount_project.STOMACH.MD.gz'
## 2023-03-27 13:57:16 caching file gtex.recount_qc.STOMACH.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/CH/STOMACH/gtex.recount_qc.STOMACH.MD.gz'
## 2023-03-27 13:57:17 caching file gtex.recount_seq_qc.STOMACH.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/CH/STOMACH/gtex.recount_seq_qc.STOMACH.MD.gz'
## 2023-03-27 13:57:18 downloading and reading the feature information.
## 2023-03-27 13:57:19 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:57:19 downloading and reading the counts: 384 samples across 63856 features.
## 2023-03-27 13:57:20 caching file gtex.gene_sums.STOMACH.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/CH/STOMACH/gtex.gene_sums.STOMACH.G026.gz'
## 2023-03-27 13:57:25 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:57:25 downloading and reading the metadata.
## 2023-03-27 13:57:26 caching file gtex.gtex.COLON.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ON/COLON/gtex.gtex.COLON.MD.gz'
## 2023-03-27 13:57:27 caching file gtex.recount_project.COLON.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ON/COLON/gtex.recount_project.COLON.MD.gz'
## 2023-03-27 13:57:29 caching file gtex.recount_qc.COLON.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ON/COLON/gtex.recount_qc.COLON.MD.gz'
## 2023-03-27 13:57:30 caching file gtex.recount_seq_qc.COLON.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ON/COLON/gtex.recount_seq_qc.COLON.MD.gz'
## 2023-03-27 13:57:31 downloading and reading the feature information.
## 2023-03-27 13:57:32 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:57:32 downloading and reading the counts: 822 samples across 63856 features.
## 2023-03-27 13:57:33 caching file gtex.gene_sums.COLON.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/ON/COLON/gtex.gene_sums.COLON.G026.gz'
## 2023-03-27 13:57:42 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:57:42 downloading and reading the metadata.
## 2023-03-27 13:57:43 caching file gtex.gtex.SMALL_INTESTINE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/NE/SMALL_INTESTINE/gtex.gtex.SMALL_INTESTINE.MD.gz'
## 2023-03-27 13:57:44 caching file gtex.recount_project.SMALL_INTESTINE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/NE/SMALL_INTESTINE/gtex.recount_project.SMALL_INTESTINE.MD.gz'
## 2023-03-27 13:57:46 caching file gtex.recount_qc.SMALL_INTESTINE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/NE/SMALL_INTESTINE/gtex.recount_qc.SMALL_INTESTINE.MD.gz'
## 2023-03-27 13:57:47 caching file gtex.recount_seq_qc.SMALL_INTESTINE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/NE/SMALL_INTESTINE/gtex.recount_seq_qc.SMALL_INTESTINE.MD.gz'
## 2023-03-27 13:57:48 downloading and reading the feature information.
## 2023-03-27 13:57:49 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:57:49 downloading and reading the counts: 193 samples across 63856 features.
## 2023-03-27 13:57:50 caching file gtex.gene_sums.SMALL_INTESTINE.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/NE/SMALL_INTESTINE/gtex.gene_sums.SMALL_INTESTINE.G026.gz'
## 2023-03-27 13:57:53 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:57:53 downloading and reading the metadata.
## 2023-03-27 13:57:53 caching file gtex.gtex.PROSTATE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/TE/PROSTATE/gtex.gtex.PROSTATE.MD.gz'
## 2023-03-27 13:57:55 caching file gtex.recount_project.PROSTATE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/TE/PROSTATE/gtex.recount_project.PROSTATE.MD.gz'
## 2023-03-27 13:57:56 caching file gtex.recount_qc.PROSTATE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/TE/PROSTATE/gtex.recount_qc.PROSTATE.MD.gz'
## 2023-03-27 13:57:57 caching file gtex.recount_seq_qc.PROSTATE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/TE/PROSTATE/gtex.recount_seq_qc.PROSTATE.MD.gz'
## 2023-03-27 13:57:59 downloading and reading the feature information.
## 2023-03-27 13:57:59 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:58:00 downloading and reading the counts: 263 samples across 63856 features.
## 2023-03-27 13:58:00 caching file gtex.gene_sums.PROSTATE.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/TE/PROSTATE/gtex.gene_sums.PROSTATE.G026.gz'
## 2023-03-27 13:58:04 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:58:04 downloading and reading the metadata.
## 2023-03-27 13:58:04 caching file gtex.gtex.TESTIS.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/IS/TESTIS/gtex.gtex.TESTIS.MD.gz'
## 2023-03-27 13:58:06 caching file gtex.recount_project.TESTIS.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/IS/TESTIS/gtex.recount_project.TESTIS.MD.gz'
## 2023-03-27 13:58:07 caching file gtex.recount_qc.TESTIS.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/IS/TESTIS/gtex.recount_qc.TESTIS.MD.gz'
## 2023-03-27 13:58:09 caching file gtex.recount_seq_qc.TESTIS.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/IS/TESTIS/gtex.recount_seq_qc.TESTIS.MD.gz'
## 2023-03-27 13:58:10 downloading and reading the feature information.
## 2023-03-27 13:58:11 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:58:12 downloading and reading the counts: 410 samples across 63856 features.
## 2023-03-27 13:58:12 caching file gtex.gene_sums.TESTIS.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/IS/TESTIS/gtex.gene_sums.TESTIS.G026.gz'
## 2023-03-27 13:58:18 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:58:18 downloading and reading the metadata.
## 2023-03-27 13:58:18 caching file gtex.gtex.NERVE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/VE/NERVE/gtex.gtex.NERVE.MD.gz'
## 2023-03-27 13:58:20 caching file gtex.recount_project.NERVE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/VE/NERVE/gtex.recount_project.NERVE.MD.gz'
## 2023-03-27 13:58:21 caching file gtex.recount_qc.NERVE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/VE/NERVE/gtex.recount_qc.NERVE.MD.gz'
## 2023-03-27 13:58:22 caching file gtex.recount_seq_qc.NERVE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/VE/NERVE/gtex.recount_seq_qc.NERVE.MD.gz'
## 2023-03-27 13:58:24 downloading and reading the feature information.
## 2023-03-27 13:58:24 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:58:25 downloading and reading the counts: 659 samples across 63856 features.
## 2023-03-27 13:58:25 caching file gtex.gene_sums.NERVE.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/VE/NERVE/gtex.gene_sums.NERVE.G026.gz'
## 2023-03-27 13:58:32 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:58:32 downloading and reading the metadata.
## 2023-03-27 13:58:33 caching file gtex.gtex.PITUITARY.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/RY/PITUITARY/gtex.gtex.PITUITARY.MD.gz'
## 2023-03-27 13:58:34 caching file gtex.recount_project.PITUITARY.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/RY/PITUITARY/gtex.recount_project.PITUITARY.MD.gz'
## 2023-03-27 13:58:35 caching file gtex.recount_qc.PITUITARY.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/RY/PITUITARY/gtex.recount_qc.PITUITARY.MD.gz'
## 2023-03-27 13:58:37 caching file gtex.recount_seq_qc.PITUITARY.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/RY/PITUITARY/gtex.recount_seq_qc.PITUITARY.MD.gz'
## 2023-03-27 13:58:38 downloading and reading the feature information.
## 2023-03-27 13:58:39 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:58:39 downloading and reading the counts: 301 samples across 63856 features.
## 2023-03-27 13:58:40 caching file gtex.gene_sums.PITUITARY.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/RY/PITUITARY/gtex.gene_sums.PITUITARY.G026.gz'
## 2023-03-27 13:58:44 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:58:44 downloading and reading the metadata.
## 2023-03-27 13:58:44 caching file gtex.gtex.BLOOD.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/OD/BLOOD/gtex.gtex.BLOOD.MD.gz'
## 2023-03-27 13:58:46 caching file gtex.recount_project.BLOOD.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/OD/BLOOD/gtex.recount_project.BLOOD.MD.gz'
## 2023-03-27 13:58:47 caching file gtex.recount_qc.BLOOD.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/OD/BLOOD/gtex.recount_qc.BLOOD.MD.gz'
## 2023-03-27 13:58:49 caching file gtex.recount_seq_qc.BLOOD.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/OD/BLOOD/gtex.recount_seq_qc.BLOOD.MD.gz'
## 2023-03-27 13:58:50 downloading and reading the feature information.
## 2023-03-27 13:58:51 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:58:51 downloading and reading the counts: 1048 samples across 63856 features.
## 2023-03-27 13:58:52 caching file gtex.gene_sums.BLOOD.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/OD/BLOOD/gtex.gene_sums.BLOOD.G026.gz'
## 2023-03-27 13:59:02 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:59:02 downloading and reading the metadata.
## 2023-03-27 13:59:03 caching file gtex.gtex.LIVER.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ER/LIVER/gtex.gtex.LIVER.MD.gz'
## 2023-03-27 13:59:04 caching file gtex.recount_project.LIVER.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ER/LIVER/gtex.recount_project.LIVER.MD.gz'
## 2023-03-27 13:59:05 caching file gtex.recount_qc.LIVER.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ER/LIVER/gtex.recount_qc.LIVER.MD.gz'
## 2023-03-27 13:59:07 caching file gtex.recount_seq_qc.LIVER.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ER/LIVER/gtex.recount_seq_qc.LIVER.MD.gz'
## 2023-03-27 13:59:08 downloading and reading the feature information.
## 2023-03-27 13:59:08 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:59:09 downloading and reading the counts: 251 samples across 63856 features.
## 2023-03-27 13:59:10 caching file gtex.gene_sums.LIVER.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/ER/LIVER/gtex.gene_sums.LIVER.G026.gz'
## 2023-03-27 13:59:12 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:59:13 downloading and reading the metadata.
## 2023-03-27 13:59:13 caching file gtex.gtex.KIDNEY.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/EY/KIDNEY/gtex.gtex.KIDNEY.MD.gz'
## 2023-03-27 13:59:14 caching file gtex.recount_project.KIDNEY.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/EY/KIDNEY/gtex.recount_project.KIDNEY.MD.gz'
## 2023-03-27 13:59:16 caching file gtex.recount_qc.KIDNEY.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/EY/KIDNEY/gtex.recount_qc.KIDNEY.MD.gz'
## 2023-03-27 13:59:18 caching file gtex.recount_seq_qc.KIDNEY.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/EY/KIDNEY/gtex.recount_seq_qc.KIDNEY.MD.gz'
## 2023-03-27 13:59:19 downloading and reading the feature information.
## 2023-03-27 13:59:20 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:59:20 downloading and reading the counts: 98 samples across 63856 features.
## 2023-03-27 13:59:21 caching file gtex.gene_sums.KIDNEY.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/EY/KIDNEY/gtex.gene_sums.KIDNEY.G026.gz'
## 2023-03-27 13:59:23 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:59:23 downloading and reading the metadata.
## 2023-03-27 13:59:24 caching file gtex.gtex.CERVIX_UTERI.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/RI/CERVIX_UTERI/gtex.gtex.CERVIX_UTERI.MD.gz'
## 2023-03-27 13:59:26 caching file gtex.recount_project.CERVIX_UTERI.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/RI/CERVIX_UTERI/gtex.recount_project.CERVIX_UTERI.MD.gz'
## 2023-03-27 13:59:29 caching file gtex.recount_qc.CERVIX_UTERI.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/RI/CERVIX_UTERI/gtex.recount_qc.CERVIX_UTERI.MD.gz'
## 2023-03-27 13:59:31 caching file gtex.recount_seq_qc.CERVIX_UTERI.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/RI/CERVIX_UTERI/gtex.recount_seq_qc.CERVIX_UTERI.MD.gz'
## 2023-03-27 13:59:32 downloading and reading the feature information.
## 2023-03-27 13:59:32 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:59:33 downloading and reading the counts: 19 samples across 63856 features.
## 2023-03-27 13:59:34 caching file gtex.gene_sums.CERVIX_UTERI.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/RI/CERVIX_UTERI/gtex.gene_sums.CERVIX_UTERI.G026.gz'
## 2023-03-27 13:59:35 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:59:35 downloading and reading the metadata.
## 2023-03-27 13:59:36 caching file gtex.gtex.FALLOPIAN_TUBE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/BE/FALLOPIAN_TUBE/gtex.gtex.FALLOPIAN_TUBE.MD.gz'
## 2023-03-27 13:59:37 caching file gtex.recount_project.FALLOPIAN_TUBE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/BE/FALLOPIAN_TUBE/gtex.recount_project.FALLOPIAN_TUBE.MD.gz'
## 2023-03-27 13:59:39 caching file gtex.recount_qc.FALLOPIAN_TUBE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/BE/FALLOPIAN_TUBE/gtex.recount_qc.FALLOPIAN_TUBE.MD.gz'
## 2023-03-27 13:59:40 caching file gtex.recount_seq_qc.FALLOPIAN_TUBE.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/BE/FALLOPIAN_TUBE/gtex.recount_seq_qc.FALLOPIAN_TUBE.MD.gz'
## 2023-03-27 13:59:42 downloading and reading the feature information.
## 2023-03-27 13:59:42 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:59:43 downloading and reading the counts: 9 samples across 63856 features.
## 2023-03-27 13:59:43 caching file gtex.gene_sums.FALLOPIAN_TUBE.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/BE/FALLOPIAN_TUBE/gtex.gene_sums.FALLOPIAN_TUBE.G026.gz'
## 2023-03-27 13:59:44 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:59:44 downloading and reading the metadata.
## 2023-03-27 13:59:45 caching file gtex.gtex.BLADDER.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ER/BLADDER/gtex.gtex.BLADDER.MD.gz'
## 2023-03-27 13:59:46 caching file gtex.recount_project.BLADDER.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ER/BLADDER/gtex.recount_project.BLADDER.MD.gz'
## 2023-03-27 13:59:48 caching file gtex.recount_qc.BLADDER.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ER/BLADDER/gtex.recount_qc.BLADDER.MD.gz'
## 2023-03-27 13:59:49 caching file gtex.recount_seq_qc.BLADDER.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/ER/BLADDER/gtex.recount_seq_qc.BLADDER.MD.gz'
## 2023-03-27 13:59:50 downloading and reading the feature information.
## 2023-03-27 13:59:51 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 13:59:51 downloading and reading the counts: 21 samples across 63856 features.
## 2023-03-27 13:59:52 caching file gtex.gene_sums.BLADDER.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/ER/BLADDER/gtex.gene_sums.BLADDER.G026.gz'
## 2023-03-27 13:59:53 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 13:59:53 downloading and reading the metadata.
## 2023-03-27 13:59:54 caching file gtex.gtex.STUDY_NA.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/NA/STUDY_NA/gtex.gtex.STUDY_NA.MD.gz'
## 2023-03-27 13:59:55 caching file gtex.recount_project.STUDY_NA.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/NA/STUDY_NA/gtex.recount_project.STUDY_NA.MD.gz'
## 2023-03-27 13:59:56 caching file gtex.recount_qc.STUDY_NA.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/NA/STUDY_NA/gtex.recount_qc.STUDY_NA.MD.gz'
## 2023-03-27 13:59:58 caching file gtex.recount_seq_qc.STUDY_NA.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/NA/STUDY_NA/gtex.recount_seq_qc.STUDY_NA.MD.gz'
## 2023-03-27 13:59:59 downloading and reading the feature information.
## 2023-03-27 14:00:00 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 14:00:00 downloading and reading the counts: 133 samples across 63856 features.
## 2023-03-27 14:00:03 caching file gtex.gene_sums.STUDY_NA.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/NA/STUDY_NA/gtex.gene_sums.STUDY_NA.G026.gz'
## 2023-03-27 14:00:05 construcing the RangedSummarizedExperiment (rse) object.
## 2023-03-27 14:00:05 downloading and reading the metadata.
## 2023-03-27 14:00:05 caching file gtex.gtex.BONE_MARROW.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/OW/BONE_MARROW/gtex.gtex.BONE_MARROW.MD.gz'
## 2023-03-27 14:00:07 caching file gtex.recount_project.BONE_MARROW.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/OW/BONE_MARROW/gtex.recount_project.BONE_MARROW.MD.gz'
## 2023-03-27 14:00:08 caching file gtex.recount_qc.BONE_MARROW.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/OW/BONE_MARROW/gtex.recount_qc.BONE_MARROW.MD.gz'
## 2023-03-27 14:00:10 caching file gtex.recount_seq_qc.BONE_MARROW.MD.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/metadata/OW/BONE_MARROW/gtex.recount_seq_qc.BONE_MARROW.MD.gz'
## 2023-03-27 14:00:11 downloading and reading the feature information.
## 2023-03-27 14:00:11 caching file human.gene_sums.G026.gtf.gz.
## 2023-03-27 14:00:12 downloading and reading the counts: 204 samples across 63856 features.
## 2023-03-27 14:00:13 caching file gtex.gene_sums.BONE_MARROW.G026.gz.
## adding rname 'http://duffel.rail.bio/recount3/human/data_sources/gtex/gene_sums/OW/BONE_MARROW/gtex.gene_sums.BONE_MARROW.G026.gz'
## 2023-03-27 14:00:15 construcing the RangedSummarizedExperiment (rse) object.
name <- paste(gtex_proj_info[, 1], "rse", sep = "_")
gtex_coldata <- colData(get(name[1]))
for (i in 2:length(name)) {
coldata <- colData(get(name[i]))
gtex_coldata <- rbind(gtex_coldata, coldata)
}
look at metadata of samples
gtex_coldata <- as.data.frame(gtex_coldata)
table(is.na(gtex_coldata$gtex.age))
##
## FALSE TRUE
## 19010 204
table(gtex_coldata$gtex.age)
##
## 20-29 30-39 40-49 50-59 60-69 70-79
## 1501 1428 2998 6089 6338 656
table(is.na(gtex_coldata$gtex.sex))
##
## FALSE TRUE
## 19010 204
table(gtex_coldata$gtex.sex)
##
## 1 2
## 12568 6442
1= male 2= female
table(gtex_coldata$gtex.dthhrdy)
##
## 0 1 2 3 4
## 9700 740 5145 1020 2254
table(is.na(gtex_coldata$gtex.dthhrdy))
##
## FALSE TRUE
## 18859 355
0=Ventilator Case 1=Violent and fast death 2=Fast death of natural causes 3=Intermediate death 4=Slow death
How many sample per tissue?
as.data.frame(table(gtex_coldata$gtex.smts))
## Var1 Freq
## 1 Adipose Tissue 1293
## 2 Adrenal Gland 274
## 3 Bladder 21
## 4 Blood 1048
## 5 Blood Vessel 1398
## 6 Bone Marrow 204
## 7 Brain 2931
## 8 Breast 482
## 9 Cervix Uteri 19
## 10 Colon 822
## 11 Esophagus 1577
## 12 Fallopian Tube 9
## 13 Heart 942
## 14 Kidney 98
## 15 Liver 251
## 16 Lung 655
## 17 Muscle 881
## 18 Nerve 659
## 19 Ovary 195
## 20 Pancreas 360
## 21 Pituitary 301
## 22 Prostate 263
## 23 Salivary Gland 178
## 24 Skin 1940
## 25 Small Intestine 193
## 26 Spleen 255
## 27 Stomach 384
## 28 Testis 410
## 29 Thyroid 706
## 30 Uterus 159
## 31 Vagina 173
substructures of tissues
as.data.frame(table(gtex_coldata$gtex.smtsd))
## Var1 Freq
## 1 Adipose - Subcutaneous 731
## 2 Adipose - Visceral (Omentum) 562
## 3 Adrenal Gland 274
## 4 Artery - Aorta 452
## 5 Artery - Coronary 253
## 6 Artery - Tibial 693
## 7 Bladder 21
## 8 Brain - Amygdala 163
## 9 Brain - Anterior cingulate cortex (BA24) 201
## 10 Brain - Caudate (basal ganglia) 273
## 11 Brain - Cerebellar Hemisphere 250
## 12 Brain - Cerebellum 285
## 13 Brain - Cortex 286
## 14 Brain - Frontal Cortex (BA9) 224
## 15 Brain - Hippocampus 220
## 16 Brain - Hypothalamus 221
## 17 Brain - Nucleus accumbens (basal ganglia) 262
## 18 Brain - Putamen (basal ganglia) 221
## 19 Brain - Spinal cord (cervical c-1) 171
## 20 Brain - Substantia nigra 154
## 21 Breast - Mammary Tissue 482
## 22 Cells - Cultured fibroblasts 520
## 23 Cells - EBV-transformed lymphocytes 196
## 24 Cells - Leukemia cell line (CML) 204
## 25 Cervix - Ectocervix 9
## 26 Cervix - Endocervix 10
## 27 Colon - Sigmoid 389
## 28 Colon - Transverse 433
## 29 Esophagus - Gastroesophageal Junction 399
## 30 Esophagus - Mucosa 625
## 31 Esophagus - Muscularis 553
## 32 Fallopian Tube 9
## 33 Heart - Atrial Appendage 450
## 34 Heart - Left Ventricle 492
## 35 Kidney - Cortex 94
## 36 Kidney - Medulla 4
## 37 Liver 251
## 38 Lung 655
## 39 Minor Salivary Gland 178
## 40 Muscle - Skeletal 881
## 41 Nerve - Tibial 659
## 42 Ovary 195
## 43 Pancreas 360
## 44 Pituitary 301
## 45 Prostate 263
## 46 Skin - Not Sun Exposed (Suprapubic) 639
## 47 Skin - Sun Exposed (Lower leg) 781
## 48 Small Intestine - Terminal Ileum 193
## 49 Spleen 255
## 50 Stomach 384
## 51 Testis 410
## 52 Thyroid 706
## 53 Uterus 159
## 54 Vagina 173
## 55 Whole Blood 852
adjust the data labels
gtex_coldata$gtex.sex <- factor(ifelse(gtex_coldata$gtex.sex == 1, "M", "F"),
levels = c("M", "F")
)
gtex_coldata$gtex.dthhrdy <- ifelse(gtex_coldata$gtex.dthhrdy == 0,
"Ventilator_Case",
ifelse(gtex_coldata$gtex.dthhrdy == 1,
"Violent_and_Fast_Death",
ifelse(
gtex_coldata$gtex.dthhrdy == 2,
"Fast_Death_of_Natural_Causes",
ifelse(
gtex_coldata$gtex.dthhrdy == 3,
"Intermediate_Death",
ifelse(
gtex_coldata$gtex.dthhrdy == 4,
"Slow_Death",
"Not Reported"
)
)
)
)
)
look at age and sex
gtex_coldata$SEX_AGE <- paste(gtex_coldata$gtex.sex,
gtex_coldata$gtex.age,
sep = "_"
)
look at age, sex, and type of death
gtex_coldata$SEX_AGE_DEATH <- paste(gtex_coldata$SEX_AGE,
gtex_coldata$gtex.dthhrdy,
sep = "_"
)
table(gtex_coldata$SEX_AGE_DEATH)
##
## F_20-29_Ventilator_Case F_20-29_Violent_and_Fast_Death
## 465 90
## F_30-39_Fast_Death_of_Natural_Causes F_30-39_Slow_Death
## 24 37
## F_30-39_Ventilator_Case F_30-39_Violent_and_Fast_Death
## 359 15
## F_40-49_Fast_Death_of_Natural_Causes F_40-49_Intermediate_Death
## 93 19
## F_40-49_NA F_40-49_Slow_Death
## 32 77
## F_40-49_Ventilator_Case F_40-49_Violent_and_Fast_Death
## 991 82
## F_50-59_Fast_Death_of_Natural_Causes F_50-59_Intermediate_Death
## 401 143
## F_50-59_NA F_50-59_Slow_Death
## 28 160
## F_50-59_Ventilator_Case F_50-59_Violent_and_Fast_Death
## 1077 65
## F_60-69_Fast_Death_of_Natural_Causes F_60-69_Intermediate_Death
## 492 162
## F_60-69_Slow_Death F_60-69_Ventilator_Case
## 560 862
## F_60-69_Violent_and_Fast_Death F_70-79_Fast_Death_of_Natural_Causes
## 19 54
## F_70-79_Intermediate_Death F_70-79_NA
## 11 7
## F_70-79_Slow_Death F_70-79_Ventilator_Case
## 59 58
## M_20-29_Fast_Death_of_Natural_Causes M_20-29_Intermediate_Death
## 59 50
## M_20-29_Ventilator_Case M_20-29_Violent_and_Fast_Death
## 747 90
## M_30-39_Fast_Death_of_Natural_Causes M_30-39_NA
## 81 13
## M_30-39_Slow_Death M_30-39_Ventilator_Case
## 61 721
## M_30-39_Violent_and_Fast_Death M_40-49_Fast_Death_of_Natural_Causes
## 117 372
## M_40-49_Intermediate_Death M_40-49_NA
## 15 5
## M_40-49_Slow_Death M_40-49_Ventilator_Case
## 170 1098
## M_40-49_Violent_and_Fast_Death M_50-59_Fast_Death_of_Natural_Causes
## 44 1586
## M_50-59_Intermediate_Death M_50-59_NA
## 95 24
## M_50-59_Slow_Death M_50-59_Ventilator_Case
## 325 2100
## M_50-59_Violent_and_Fast_Death M_60-69_Fast_Death_of_Natural_Causes
## 85 1843
## M_60-69_Intermediate_Death M_60-69_NA
## 495 39
## M_60-69_Slow_Death M_60-69_Ventilator_Case
## 700 1093
## M_60-69_Violent_and_Fast_Death M_70-79_Fast_Death_of_Natural_Causes
## 73 140
## M_70-79_Intermediate_Death M_70-79_NA
## 30 3
## M_70-79_Slow_Death M_70-79_Ventilator_Case
## 105 129
## M_70-79_Violent_and_Fast_Death NA_NA_NA
## 60 204
Remove one sample (GTEX-11 ILO) that was identified in previous literature to be an individual who completed a sex change (Paulson et al. 2017). We also focused on samples from the TRUSeq.v1 chemistry.
gtex_coldata2 <- gtex_coldata[!is.na(gtex_coldata$gtex.smgebtcht), ]
gtex_coldata3 <- gtex_coldata2[gtex_coldata2$gtex.smgebtcht == "TruSeq.v1", ]
gtex_coldata3 <- gtex_coldata3[!is.na(gtex_coldata3$gtex.subjid), ]
gtex_coldata4 <- gtex_coldata3[!gtex_coldata3$gtex.subjid == "GTEX-11ILO", ]
gtex_coldata <- gtex_coldata4
# for each tissue count each
# i and J loops
tissue <- unique(gtex_coldata$gtex.smts)
sub_tissues <- unique(gtex_coldata$gtex.smtsd)
tissue_data <- paste(tissue, "data", sep = "_")
# Sex
sex_table <- as.data.frame(table(gtex_coldata[c(14, 6)]))
# Age
age_table <- as.data.frame(table(gtex_coldata[c(14, 7)]))
# Death
death_table <- as.data.frame(table(gtex_coldata[c(14, 8)]))
# AGE_Sex
age_sex_table <- as.data.frame(table(gtex_coldata[c(14, 199)]))
age_sex_table_v2 <- as.data.frame(table(gtex_coldata[c(14, 6, 7)]))
# AGE_Sex_Death
age_sex_death_table <- as.data.frame(table(gtex_coldata[c(14, 200)]))
age_sex_death_table_v2 <- as.data.frame(table(gtex_coldata[c(14, 6, 7, 8)]))
plot the number of samples for each sex across the tissues
ggplot(sex_table, aes(fill = gtex.sex, y = Freq, x = gtex.smts)) +
geom_bar(position = "stack", stat = "identity") +
theme_classic() +
xlab("Tissue") +
ylab("Count") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Sex for GTEx tissue")
ggsave("~/results/GTEx_plots/210806_sex_tissue_GTEx.pdf")
## Saving 7 x 5 in image
plot the number of samples for each sex across the tissues(with the precentage)
ggplot(sex_table, aes(fill = gtex.sex, y = Freq, x = gtex.smts)) +
geom_bar(position = "fill", stat = "identity") +
theme_classic() +
xlab("Tissue") +
ylab("Fraction") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Sex for GTEx tissue")
ggsave("~/results/GTEx_plots/210806_sex_tissue_GTEx_percent.pdf")
## Saving 7 x 5 in image
plot the number of samples for each age groups across the tissues
ggplot(age_table, aes(fill = gtex.age, y = Freq, x = gtex.smts)) +
geom_bar(position = "stack", stat = "identity") +
theme_classic() +
xlab("Tissue") +
ylab("Count") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Age for GTEx tissue")
ggsave("~/results/GTEx_plots/210806_age_tissue_GTEx.pdf")
## Saving 7 x 5 in image
plot the number of samples for each age group across the tissues (precentage)
ggplot(age_table, aes(fill = gtex.age, y = Freq, x = gtex.smts)) +
geom_bar(position = "fill", stat = "identity") +
theme_classic() +
xlab("Tissue") +
ylab("Fraction") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Age for GTEx tissue")
ggsave("~/results/GTEx_plots/210806_age_tissue_GTEx_precent.pdf")
## Saving 7 x 5 in image
plot the number of samples for each sex/age groups across the tissues
ggplot(age_sex_table, aes(fill = SEX_AGE, y = Freq, x = gtex.smts)) +
geom_bar(position = "stack", stat = "identity") +
theme_classic() +
xlab("Tissue") +
ylab("Count") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Sex and Age for GTEx tissue")
ggsave("~/results/GTEx_plots/210806_sex_age_tissue_GTEx.pdf")
## Saving 7 x 5 in image
plot the number of samples for each sex/ages across the tissues (percentage)
ggplot(age_sex_table, aes(fill = SEX_AGE, y = Freq, x = gtex.smts)) +
geom_bar(position = "fill", stat = "identity") +
theme_classic() +
xlab("Tissue") +
ylab("Fraction") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Sex and Age for GTEx tissue")
ggsave("~/results/GTEx_plots/210806_sex_age_tissue_GTEx_precent.pdf")
## Saving 7 x 5 in image
plot the number of samples for each type of death across the tissues
ggplot(death_table, aes(fill = gtex.dthhrdy, y = Freq, x = gtex.smts)) +
geom_bar(position = "fill", stat = "identity") +
theme_classic() +
xlab("Tissue") +
ylab("Fraction") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Type of Death for GTEx tissue")
ggsave("~/results/GTEx_plots/210806_death_tissue_GTEx_precent.pdf")
## Saving 7 x 5 in image
what types of tissues do I have?
tissue
## [1] "Adipose Tissue" "Muscle" "Blood Vessel" "Heart"
## [5] "Ovary" "Uterus" "Vagina" "Breast"
## [9] "Skin" "Salivary Gland" "Brain" "Adrenal Gland"
## [13] "Thyroid" "Lung" "Spleen" "Pancreas"
## [17] "Esophagus" "Stomach" "Colon" "Small Intestine"
## [21] "Prostate" "Testis" "Nerve" "Pituitary"
## [25] "Blood" "Liver" "Kidney" "Cervix Uteri"
## [29] "Fallopian Tube" "Bladder"
for each tissue plot the sex and age
for (i in seq_along(tissue)) {
data <- age_sex_table_v2[age_sex_table_v2$gtex.smts == tissue[i], ]
title <- paste("Sex and Age for GTEx", tissue[i])
plot <- ggplot(data = data, aes(
x = gtex.age,
y = Freq, group = gtex.sex
)) +
geom_line(aes(linetype = gtex.sex)) +
geom_point() +
theme_classic() +
ggtitle(title)
print(plot)
nfile <- paste("~/results/GTEx_plots/210806_age_sex_GTEx_",
tissue[i], ".pdf",
sep = ""
)
ggsave(nfile)
}
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
combine the results together
ggplot(data = age_sex_table_v2, aes(
x = gtex.age,
y = Freq,
group = interaction(
gtex.sex,
gtex.smts
)
)) +
geom_line(aes(color = interaction(gtex.sex, gtex.smts))) +
geom_point() +
theme_classic() +
ggtitle("Sex and Age GTEx Tissues")
ggsave("~/results/GTEx_plots/210806_sex_age_all_tissue_GTEx.pdf")
## Saving 7 x 5 in image
Look at the sub tissues
# Sex
sex_sub_table <- as.data.frame(table(gtex_coldata[c(15, 6)]))
# Age
age_sub_table <- as.data.frame(table(gtex_coldata[c(15, 7)]))
# Death
death_sub_table <- as.data.frame(table(gtex_coldata[c(15, 8)]))
# AGE_Sex
age_sex_sub_table <- as.data.frame(table(gtex_coldata[c(15, 199)]))
age_sex_sub_table_v2 <- as.data.frame(table(gtex_coldata[c(15, 6, 7)]))
# AGE_Sex_Death
age_sex_death_sub_table <- as.data.frame(table(gtex_coldata[c(15, 200)]))
age_sex_death_sub_table_v2 <- as.data.frame(table(
gtex_coldata[c(15, 6, 7, 8)]
))
Plot the number of samples for each sex across the sub-tissue groups
ggplot(sex_sub_table, aes(fill = gtex.sex, y = Freq, x = gtex.smtsd)) +
geom_bar(position = "stack", stat = "identity") +
theme_classic() +
xlab("Tissue") +
ylab("Count") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Sex for GTEx tissue")
ggsave("~/results/210806_sex_tissue_structures_GTEx.pdf")
## Saving 7 x 5 in image
Plot the number of samples for each sex across the sub-tissue groups
ggplot(sex_sub_table, aes(fill = gtex.sex, y = Freq, x = gtex.smtsd)) +
geom_bar(position = "fill", stat = "identity") +
theme_classic() +
xlab("Tissue") +
ylab("Fraction") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Sex for GTEx tissue")
ggsave("~/results/GTEx_plots/210806_sex_tissue_structures_GTEx_precent.pdf")
## Saving 7 x 5 in image
Plot the number of samples for each age group across the sub-tissue groups
ggplot(age_sub_table, aes(fill = gtex.age, y = Freq, x = gtex.smtsd)) +
geom_bar(position = "stack", stat = "identity") +
theme_classic() +
xlab("Tissue") +
ylab("Count") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Age for GTEx tissue")
ggsave("~/results/GTEx_plots/210806_age_tissue_structures_GTEx.pdf")
## Saving 7 x 5 in image
Plot the number of samples for each sex across the sub-tissue groups
ggplot(age_sub_table, aes(fill = gtex.age, y = Freq, x = gtex.smtsd)) +
geom_bar(position = "fill", stat = "identity") +
theme_classic() +
xlab("Tissue") +
ylab("Fraction") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Age for GTEx tissue")
ggsave("~/results/210806_age_tissue_structures_GTEx.pdf")
## Saving 7 x 5 in image
Plot the number of samples for each sex/age across the sub-tissue groups
ggplot(age_sex_sub_table, aes(fill = SEX_AGE, y = Freq, x = gtex.smtsd)) +
geom_bar(position = "stack", stat = "identity") +
theme_classic() +
xlab("Tissue") +
ylab("Count") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Sex and Age for GTEx tissue")
ggsave("~/results/GTEx_plots/210806_sex_age_tissue_structures_GTEx.pdf")
## Saving 7 x 5 in image
Plot the number of samples for each sex/age across the sub-tissue groups
ggplot(age_sex_sub_table, aes(fill = SEX_AGE, y = Freq, x = gtex.smtsd)) +
geom_bar(position = "fill", stat = "identity") +
theme_classic() +
xlab("Tissue") +
ylab("Fraction") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Sex and Age for GTEx tissue")
ggsave("~/results/GTEx_plots/210806_sex_age_tissue_structures_GTEx_precent.pdf")
## Saving 7 x 5 in image
Plot the number of samples for each type of death across the sub-tissue groups
ggplot(death_sub_table, aes(fill = gtex.dthhrdy, y = Freq, x = gtex.smtsd)) +
geom_bar(position = "fill", stat = "identity") +
theme_classic() +
xlab("Tissue") +
ylab("Fraction") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Type of Death for GTEx tissue")
ggsave("~/results/GTEx_plots/210806_death_tissue_structures_GTEx_precent.pdf")
## Saving 7 x 5 in image
Plot the RIN score for each sex across the sub-tissue groups
ggplot(gtex_coldata, aes(x = gtex.smts, y = gtex.smrin, fill = gtex.sex)) +
geom_boxplot(position = position_dodge(1)) +
theme_classic() +
xlab("Tissue") +
ylab("RIN Score") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("RIN for GTEx tissue") +
geom_hline(yintercept = 5, linetype = "dashed", color = "red")
ggsave("~/results/210806_RIN_tissue_GTEx.pdf")
## Saving 7 x 5 in image
Plot the autolysis score.
The autolysis score was assigned by a pathologist during a visual inspection of the histology image. The assigned values ranged from 0 to 3 (None, Mild, Moderate, and Severe).
autolysis_score <- as.data.frame(table(gtex_coldata[c(14, 10)]))
ggplot(autolysis_score, aes(fill = gtex.smatsscr, y = Freq, x = gtex.smts)) +
geom_bar(position = "fill", stat = "identity") +
theme_classic() +
xlab("Tissue") +
ylab("Fraction") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Autolysis Score for GTEx tissue")
## Warning: Removed 4 rows containing missing values (`geom_bar()`).
ggsave("~/results/GTEx_plots/210806_autolysis_score_tissue_GTEx_precent.pdf")
## Saving 7 x 5 in image
## Warning: Removed 4 rows containing missing values (`geom_bar()`).
Plot the total number of reads aligned/map
ggplot(gtex_coldata, aes(x = gtex.smts, y = gtex.smmppd, fill = gtex.sex)) +
geom_boxplot(position = position_dodge(1)) +
theme_classic() +
xlab("Tissue") +
ylab("Total number of reads aligned/map") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Total number of reads aligned/map for GTEx tissue")
ggsave("~/results/GTEx_plots/210806_ALIGNED_tissue_GTEx.pdf")
## Saving 7 x 5 in image
Plot the ischemic time for the samples
ggplot(gtex_coldata, aes(x = gtex.smts, y = gtex.smtsisch, fill = gtex.sex)) +
geom_boxplot(position = position_dodge(1)) +
theme_classic() +
xlab("Tissue") +
ylab("Ischemic Time") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Ischemic Time for GTEx tissue")
## Warning: Removed 34 rows containing non-finite values (`stat_boxplot()`).
ggsave("~/results/GTEx_plots/210806_Ischemic_Time_tissue_GTEx.pdf")
## Saving 7 x 5 in image
## Warning: Removed 34 rows containing non-finite values (`stat_boxplot()`).
Plot the mapping rate for the samples
ggplot(gtex_coldata, aes(x = gtex.smts, y = gtex.smmaprt, fill = gtex.sex)) +
geom_boxplot(position = position_dodge(1)) +
theme_classic() +
xlab("Tissue") +
ylab("Mapping Rate") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Mapping Rate for GTEx tissue")
ggsave("~/results/GTEx_plots/210806_mapping_rate_tissue_GTEx.pdf")
## Saving 7 x 5 in image
look at the sub tissue groups
Plot the RIn score
High-quality RNA will contain an RIN of at least 8, where partially fragmented RNA will contain an RIN within the range of 6–8. Any RNA sample that has a RIN below 5 should not be subjected to further fragmentation during the ScriptSeq protocol, as it will generate smaller than desired fragments.
ggplot(gtex_coldata, aes(x = gtex.smtsd, y = gtex.smrin, fill = gtex.sex)) +
geom_boxplot(position = position_dodge(1)) +
theme_classic() +
xlab("Tissue") +
ylab("RIN Score") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("RIN for GTEx tissue") +
geom_hline(yintercept = 5, linetype = "dashed", color = "red")
ggsave("~/results/GTEx_plots/210806_RIN_tissue_structures_GTEx.pdf")
## Saving 7 x 5 in image
Plot the autolysis score
autolysis_score <- as.data.frame(table(gtex_coldata[c(15, 10)]))
ggplot(autolysis_score, aes(fill = gtex.smatsscr, y = Freq, x = gtex.smtsd)) +
geom_bar(position = "fill", stat = "identity") +
theme_classic() +
xlab("Tissue") +
ylab("Fraction") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Autolysis Score for GTEx tissue")
## Warning: Removed 56 rows containing missing values (`geom_bar()`).
ggsave(
"~/results/GTEx_plots/210806_autolysis_score_tissue_structure_GTEx_precent.pdf"
)
## Saving 7 x 5 in image
## Warning: Removed 56 rows containing missing values (`geom_bar()`).
Plot the number of reads aligned
ggplot(gtex_coldata, aes(x = gtex.smtsd, y = gtex.smmppd, fill = gtex.sex)) +
geom_boxplot(position = position_dodge(1)) +
theme_classic() +
xlab("Tissue") +
ylab("Total number of reads aligned/map") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Total number of reads aligned/map for GTEx tissue")
ggsave("~/results/GTEx_plots/210806_aligned_tissue_structures_GTEx.pdf")
## Saving 7 x 5 in image
plot the ischemic time Interval between actual death, presumed death, or cross clamp application and final tissue stabilization
ggplot(gtex_coldata, aes(x = gtex.smtsd, y = gtex.smtsisch, fill = gtex.sex)) +
geom_boxplot(position = position_dodge(1)) +
theme_classic() +
xlab("Tissue") +
ylab("Ischemic Time (mins)") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Ischemic Time for GTEx tissue")
## Warning: Removed 34 rows containing non-finite values (`stat_boxplot()`).
ggsave("~/results/GTEx_plots/210806_ischemic_time_tissue_structures_GTEx.pdf")
## Saving 7 x 5 in image
## Warning: Removed 34 rows containing non-finite values (`stat_boxplot()`).
Plot the Mapping Rate: Ratio of total mapped reads to total reads
ggplot(gtex_coldata, aes(x = gtex.smtsd, y = gtex.smmaprt, fill = gtex.sex)) +
geom_boxplot(position = position_dodge(1)) +
theme_classic() +
xlab("Tissue") +
ylab("Mapping Rate") +
scale_fill_viridis(discrete = TRUE) +
theme(
axis.text.x = element_text(angle = 90, size = 3.5),
axis.ticks = element_blank()
) +
ggtitle("Mapping Rate for GTEx tissue")
ggsave("~/results/GTEx_plots/210806_mapping_rate_tissue_structures_GTEx.pdf")
## Saving 7 x 5 in image
Look at all the tissue/sex that have less than 10 samples
sex_sub_table[sex_sub_table$Freq < 10, ]
## gtex.smtsd gtex.sex Freq
## 24 Cervix - Ectocervix M 0
## 25 Cervix - Endocervix M 0
## 31 Fallopian Tube M 0
## 35 Kidney - Medulla M 3
## 41 Ovary M 0
## 52 Uterus M 0
## 53 Vagina M 0
## 61 Bladder F 7
## 78 Cervix - Ectocervix F 9
## 85 Fallopian Tube F 9
## 89 Kidney - Medulla F 1
## 98 Prostate F 0
## 104 Testis F 0
unique(sex_sub_table$gtex.smtsd[sex_sub_table$Freq < 10])
## [1] Cervix - Ectocervix Cervix - Endocervix Fallopian Tube
## [4] Kidney - Medulla Ovary Uterus
## [7] Vagina Bladder Prostate
## [10] Testis
## 54 Levels: Adipose - Subcutaneous ... Whole Blood
remove all the tissues with less than 10, but bladder
aka. sex-specific tissue or low numbers of samples of one sex: cervix-ectocervix, cervix-endocervix, fallopian tube, kidney-medulla, ovary, uterus, vagina, prostate, and testis.
# keep bladder aka number 8
remove_tissues <- unique(sex_sub_table$gtex.smtsd[sex_sub_table$Freq < 10])[-8]
I also removed samples with a RIN score less than or equal to 5
gtex_coldata_v2 <- gtex_coldata[!gtex_coldata$gtex.smtsd %in% remove_tissues, ]
gtex_coldata_v2 <- gtex_coldata_v2[gtex_coldata_v2$gtex.smrin > 5, ]
as.data.frame(table(gtex_coldata_v2[c(15, 6)]))
## gtex.smtsd gtex.sex Freq
## 1 Adipose - Subcutaneous M 475
## 2 Adipose - Visceral (Omentum) M 383
## 3 Adrenal Gland M 168
## 4 Artery - Aorta M 292
## 5 Artery - Coronary M 152
## 6 Artery - Tibial M 466
## 7 Bladder M 14
## 8 Brain - Amygdala M 115
## 9 Brain - Anterior cingulate cortex (BA24) M 145
## 10 Brain - Caudate (basal ganglia) M 197
## 11 Brain - Cerebellar Hemisphere M 177
## 12 Brain - Cerebellum M 195
## 13 Brain - Cortex M 199
## 14 Brain - Frontal Cortex (BA9) M 158
## 15 Brain - Hippocampus M 156
## 16 Brain - Hypothalamus M 161
## 17 Brain - Nucleus accumbens (basal ganglia) M 192
## 18 Brain - Putamen (basal ganglia) M 167
## 19 Brain - Spinal cord (cervical c-1) M 107
## 20 Brain - Substantia nigra M 111
## 21 Breast - Mammary Tissue M 302
## 22 Cells - Cultured fibroblasts M 338
## 23 Cells - EBV-transformed lymphocytes M 119
## 24 Colon - Sigmoid M 251
## 25 Colon - Transverse M 277
## 26 Esophagus - Gastroesophageal Junction M 267
## 27 Esophagus - Mucosa M 407
## 28 Esophagus - Muscularis M 359
## 29 Heart - Atrial Appendage M 306
## 30 Heart - Left Ventricle M 335
## 31 Kidney - Cortex M 73
## 32 Liver M 176
## 33 Lung M 434
## 34 Minor Salivary Gland M 127
## 35 Muscle - Skeletal M 584
## 36 Nerve - Tibial M 444
## 37 Pancreas M 223
## 38 Pituitary M 217
## 39 Skin - Not Sun Exposed (Suprapubic) M 432
## 40 Skin - Sun Exposed (Lower leg) M 509
## 41 Small Intestine - Terminal Ileum M 123
## 42 Spleen M 161
## 43 Stomach M 240
## 44 Thyroid M 463
## 45 Whole Blood M 549
## 46 Adipose - Subcutaneous F 249
## 47 Adipose - Visceral (Omentum) F 179
## 48 Adrenal Gland F 106
## 49 Artery - Aorta F 160
## 50 Artery - Coronary F 100
## 51 Artery - Tibial F 220
## 52 Bladder F 7
## 53 Brain - Amygdala F 47
## 54 Brain - Anterior cingulate cortex (BA24) F 56
## 55 Brain - Caudate (basal ganglia) F 76
## 56 Brain - Cerebellar Hemisphere F 69
## 57 Brain - Cerebellum F 84
## 58 Brain - Cortex F 83
## 59 Brain - Frontal Cortex (BA9) F 63
## 60 Brain - Hippocampus F 64
## 61 Brain - Hypothalamus F 60
## 62 Brain - Nucleus accumbens (basal ganglia) F 70
## 63 Brain - Putamen (basal ganglia) F 54
## 64 Brain - Spinal cord (cervical c-1) F 64
## 65 Brain - Substantia nigra F 43
## 66 Breast - Mammary Tissue F 179
## 67 Cells - Cultured fibroblasts F 181
## 68 Cells - EBV-transformed lymphocytes F 70
## 69 Colon - Sigmoid F 138
## 70 Colon - Transverse F 155
## 71 Esophagus - Gastroesophageal Junction F 132
## 72 Esophagus - Mucosa F 217
## 73 Esophagus - Muscularis F 194
## 74 Heart - Atrial Appendage F 143
## 75 Heart - Left Ventricle F 153
## 76 Kidney - Cortex F 21
## 77 Liver F 75
## 78 Lung F 206
## 79 Minor Salivary Gland F 51
## 80 Muscle - Skeletal F 291
## 81 Nerve - Tibial F 214
## 82 Pancreas F 132
## 83 Pituitary F 84
## 84 Skin - Not Sun Exposed (Suprapubic) F 205
## 85 Skin - Sun Exposed (Lower leg) F 266
## 86 Small Intestine - Terminal Ileum F 70
## 87 Spleen F 93
## 88 Stomach F 144
## 89 Thyroid F 237
## 90 Whole Blood F 291
comibne the count and metadata data together of the filtered samples
name <- paste(gtex_proj_info[, 1], "rse", sep = "_")
gtex_counts <- as.data.frame(assay(get(name[1])))
for (i in 2:length(name)) {
counts <- as.data.frame(assay(get(name[i])))
gtex_counts <- cbind(gtex_counts, counts)
}
ids <- gtex_coldata_v2$external_id
gtex_counts_v2 <- gtex_counts[, colnames(gtex_counts) %in% ids]
dim(gtex_counts_v2)
## [1] 63856 17542
dim(gtex_coldata_v2)
## [1] 17542 200
setdiff(gtex_coldata_v2$external_id, colnames(gtex_counts_v2))
## character(0)
gtex_coldata_v2$external_id[duplicated(gtex_coldata_v2$external_id)]
## character(0)
saveRDS(gtex_coldata_v2, "~/data/metadata_gtex_filter_samples.rds")
saveRDS(gtex_counts_v2, "~/data/counts_gtex_filter_samples.rds")
table(gtex_coldata_v2$gtex.smnabtcht)
##
## RNA Extraction from Paxgene-derived Lysate Plate Based
## 10737
## RNA isolation_PAXgene Blood RNA (Manual)
## 840
## RNA isolation_PAXgene Tissue miRNA
## 5257
## RNA isolation_Trizol Manual (Cell Pellet)
## 708
covariates to look at it in PCA: ischemic time, RIN, age, and batch in just liver tissue
unique(gtex_coldata_v2$gtex.smtsd)
## [1] "Adipose - Subcutaneous"
## [2] "Adipose - Visceral (Omentum)"
## [3] "Muscle - Skeletal"
## [4] "Artery - Tibial"
## [5] "Artery - Aorta"
## [6] "Artery - Coronary"
## [7] "Heart - Atrial Appendage"
## [8] "Heart - Left Ventricle"
## [9] "Breast - Mammary Tissue"
## [10] "Cells - Cultured fibroblasts"
## [11] "Skin - Sun Exposed (Lower leg)"
## [12] "Skin - Not Sun Exposed (Suprapubic)"
## [13] "Minor Salivary Gland"
## [14] "Brain - Hippocampus"
## [15] "Brain - Cortex"
## [16] "Brain - Putamen (basal ganglia)"
## [17] "Brain - Anterior cingulate cortex (BA24)"
## [18] "Brain - Cerebellar Hemisphere"
## [19] "Brain - Frontal Cortex (BA9)"
## [20] "Brain - Spinal cord (cervical c-1)"
## [21] "Brain - Substantia nigra"
## [22] "Brain - Nucleus accumbens (basal ganglia)"
## [23] "Brain - Hypothalamus"
## [24] "Brain - Cerebellum"
## [25] "Brain - Caudate (basal ganglia)"
## [26] "Brain - Amygdala"
## [27] "Adrenal Gland"
## [28] "Thyroid"
## [29] "Lung"
## [30] "Spleen"
## [31] "Pancreas"
## [32] "Esophagus - Muscularis"
## [33] "Esophagus - Mucosa"
## [34] "Esophagus - Gastroesophageal Junction"
## [35] "Stomach"
## [36] "Colon - Transverse"
## [37] "Colon - Sigmoid"
## [38] "Small Intestine - Terminal Ileum"
## [39] "Nerve - Tibial"
## [40] "Pituitary"
## [41] "Whole Blood"
## [42] "Cells - EBV-transformed lymphocytes"
## [43] "Liver"
## [44] "Kidney - Cortex"
## [45] "Bladder"
ids <- gtex_coldata_v2$external_id[gtex_coldata_v2$gtex.smtsd == "Liver"]
col_data_sub <- gtex_coldata_v2[gtex_coldata_v2$gtex.smtsd == "Liver", ]
counts_sub <- gtex_counts_v2[, colnames(gtex_counts_v2) %in% ids]
pca_sub <- prcomp(t(counts_sub))
the lighter the color the higher the value.
fun <- colorRamp(c("black", "#FDE725FF"))
mpg <- with(
col_data_sub,
(gtex.smrin - min(gtex.smrin)) / diff(range(gtex.smrin))
)
mycolors <- rgb(fun(mpg), maxColorValue = 256)
par(cex = 1.0, cex.axis = 0.8, cex.main = 0.8)
pairs(pca_sub$x[, 1:5], col = mycolors,
main = "Principal components analysis bi-plot\nPCs 1-5",
pch = 16)
mpg <- with(col_data_sub,
(gtex.smtsisch - min(gtex.smtsisch)) / diff(range(gtex.smtsisch)))
mycolors <- rgb(fun(mpg), maxColorValue = 256)
par(cex = 1.0, cex.axis = 0.8, cex.main = 0.8)
pairs(pca_sub$x[, 1:5], col = mycolors,
main = "Principal components analysis bi-plot\nPCs 1-5",
pch = 16)
cor(col_data_sub$gtex.smtsisch, col_data_sub$gtex.smrin, method = "spearman")
## [1] -0.5652864
somewhat of a relationship but not strong.
age <- col_data_sub$gtex.age
age <- ifelse(age == "20-29", "#440154FF", age)
age <- ifelse(age == "30-39", "#414487FF", age)
age <- ifelse(age == "40-49", "#2A788EFF", age)
age <- ifelse(age == "50-59", "#22A884FF", age)
age <- ifelse(age == "60-69", "#7AD151FF", age)
age <- ifelse(age == "70-79", "#FDE725FF", age)
par(cex = 1.0, cex.axis = 0.8, cex.main = 0.8)
pairs(pca_sub$x[, 1:5], col = age,
main = "Principal components analysis bi-plot\nPCs 1-5", pch = 16)
some grouping by age but it is mostly RIN and Ischemic time; using all
the investigated covariates for downstream models
#plots for age and batches
*Worried bout the kidney low RIN and high autolysis scores
*worried about about the quality of the alignment of whole blood
NA
Done in script
Location of final scripts:
"/home/rstudio/script"
Location of data produced:
"/home/rstudio/results/GTEx_plots/"
Dates when operations were done:
210810 and again on 220803 for project
sessionInfo()
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.1 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] RColorBrewer_1.1-3 viridis_0.6.2
## [3] viridisLite_0.4.1 forcats_0.5.2
## [5] stringr_1.5.0 purrr_1.0.0
## [7] readr_2.1.3 tidyr_1.2.1
## [9] tibble_3.1.8 ggplot2_3.4.0
## [11] tidyverse_1.3.2 dbplyr_2.2.1
## [13] dplyr_1.0.10 rlang_1.0.6
## [15] recount3_1.8.0 SummarizedExperiment_1.28.0
## [17] Biobase_2.58.0 GenomicRanges_1.50.2
## [19] GenomeInfoDb_1.34.4 IRanges_2.32.0
## [21] S4Vectors_0.36.1 BiocGenerics_0.44.0
## [23] MatrixGenerics_1.10.0 matrixStats_0.63.0
##
## loaded via a namespace (and not attached):
## [1] googledrive_2.0.0 colorspace_2.0-3 rjson_0.2.21
## [4] ellipsis_0.3.2 XVector_0.38.0 fs_1.5.2
## [7] rstudioapi_0.14 farver_2.1.1 bit64_4.0.5
## [10] fansi_1.0.3 lubridate_1.9.0 xml2_1.3.3
## [13] codetools_0.2-18 R.methodsS3_1.8.2 cachem_1.0.6
## [16] knitr_1.41 jsonlite_1.8.4 Rsamtools_2.14.0
## [19] broom_1.0.2 R.oo_1.25.0 compiler_4.2.2
## [22] httr_1.4.4 backports_1.4.1 assertthat_0.2.1
## [25] Matrix_1.5-1 fastmap_1.1.0 gargle_1.2.1
## [28] cli_3.5.0 htmltools_0.5.4 tools_4.2.2
## [31] gtable_0.3.1 glue_1.6.2 GenomeInfoDbData_1.2.9
## [34] rappdirs_0.3.3 Rcpp_1.0.9 cellranger_1.1.0
## [37] jquerylib_0.1.4 vctrs_0.5.1 Biostrings_2.66.0
## [40] rtracklayer_1.58.0 xfun_0.36 rvest_1.0.3
## [43] timechange_0.1.1 lifecycle_1.0.3 restfulr_0.0.15
## [46] XML_3.99-0.13 googlesheets4_1.0.1 zlibbioc_1.44.0
## [49] scales_1.2.1 ragg_1.2.4 hms_1.1.2
## [52] parallel_4.2.2 yaml_2.3.6 curl_4.3.3
## [55] memoise_2.0.1 gridExtra_2.3 sass_0.4.4
## [58] stringi_1.7.8 RSQLite_2.2.20 highr_0.10
## [61] BiocIO_1.8.0 filelock_1.0.2 BiocParallel_1.32.5
## [64] pkgconfig_2.0.3 systemfonts_1.0.4 bitops_1.0-7
## [67] evaluate_0.19 lattice_0.20-45 GenomicAlignments_1.34.0
## [70] labeling_0.4.2 bit_4.0.5 tidyselect_1.2.0
## [73] magrittr_2.0.3 R6_2.5.1 generics_0.1.3
## [76] DelayedArray_0.24.0 DBI_1.1.3 pillar_1.8.1
## [79] haven_2.5.1 withr_2.5.0 RCurl_1.98-1.9
## [82] modelr_0.1.10 crayon_1.5.2 utf8_1.2.2
## [85] BiocFileCache_2.6.0 tzdb_0.3.0 rmarkdown_2.19
## [88] grid_4.2.2 readxl_1.4.1 data.table_1.14.6
## [91] blob_1.2.3 reprex_2.0.2 digest_0.6.31
## [94] textshaping_0.3.6 R.utils_2.12.2 munsell_0.5.0
## [97] bslib_0.4.2 sessioninfo_1.2.2